{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read files "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import cudf as gd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = '/rapids/notebooks/srabhi/champs-2019/input/csv/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(4658147, 6) (2505542, 5)\n",
      "(2358657, 6)\n",
      "CPU times: user 1.05 s, sys: 1.04 s, total: 2.08 s\n",
      "Wall time: 4.3 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "train = gd.read_csv('%s/train.csv'%path)\n",
    "test = gd.read_csv('%s/test.csv'%path)\n",
    "print(train.shape,test.shape)\n",
    "for col in train.columns:\n",
    "    if train[col].dtype!='O':\n",
    "        train[col] = train[col].astype('float32')\n",
    "        if col in test.columns:\n",
    "            test[col] = test[col].astype('float32')\n",
    "struct = gd.read_csv('%s/structures.csv'%path)\n",
    "struct.head().to_pandas()\n",
    "for col in struct.columns:\n",
    "    if struct[col].dtype!='O':\n",
    "        struct[col] = struct[col].astype('float32')\n",
    "print(struct.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "contribs = gd.read_csv('%s/scalar_coupling_contributions.csv'%path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>molecule_name</th>\n",
       "      <th>atom_index_0</th>\n",
       "      <th>atom_index_1</th>\n",
       "      <th>type</th>\n",
       "      <th>fc</th>\n",
       "      <th>sd</th>\n",
       "      <th>pso</th>\n",
       "      <th>dso</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1JHC</td>\n",
       "      <td>83.0224</td>\n",
       "      <td>0.254579</td>\n",
       "      <td>1.25862</td>\n",
       "      <td>0.272010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2JHH</td>\n",
       "      <td>-11.0347</td>\n",
       "      <td>0.352978</td>\n",
       "      <td>2.85839</td>\n",
       "      <td>-3.433600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2JHH</td>\n",
       "      <td>-11.0325</td>\n",
       "      <td>0.352944</td>\n",
       "      <td>2.85852</td>\n",
       "      <td>-3.433870</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>2JHH</td>\n",
       "      <td>-11.0319</td>\n",
       "      <td>0.352934</td>\n",
       "      <td>2.85855</td>\n",
       "      <td>-3.433930</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1JHC</td>\n",
       "      <td>83.0222</td>\n",
       "      <td>0.254585</td>\n",
       "      <td>1.25861</td>\n",
       "      <td>0.272013</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      molecule_name  atom_index_0  atom_index_1  type       fc        sd  \\\n",
       "0  dsgdb9nsd_000001             1             0  1JHC  83.0224  0.254579   \n",
       "1  dsgdb9nsd_000001             1             2  2JHH -11.0347  0.352978   \n",
       "2  dsgdb9nsd_000001             1             3  2JHH -11.0325  0.352944   \n",
       "3  dsgdb9nsd_000001             1             4  2JHH -11.0319  0.352934   \n",
       "4  dsgdb9nsd_000001             2             0  1JHC  83.0222  0.254585   \n",
       "\n",
       "       pso       dso  \n",
       "0  1.25862  0.272010  \n",
       "1  2.85839 -3.433600  \n",
       "2  2.85852 -3.433870  \n",
       "3  2.85855 -3.433930  \n",
       "4  1.25861  0.272013  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "contribs.head().to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>molecule_name</th>\n",
       "      <th>atom_index_0</th>\n",
       "      <th>atom_index_1</th>\n",
       "      <th>type</th>\n",
       "      <th>scalar_coupling_constant</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1JHC</td>\n",
       "      <td>84.807602</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2JHH</td>\n",
       "      <td>-11.257000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2.0</td>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2JHH</td>\n",
       "      <td>-11.254800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3.0</td>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2JHH</td>\n",
       "      <td>-11.254300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4.0</td>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1JHC</td>\n",
       "      <td>84.807404</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    id     molecule_name  atom_index_0  atom_index_1  type  \\\n",
       "0  0.0  dsgdb9nsd_000001           1.0           0.0  1JHC   \n",
       "1  1.0  dsgdb9nsd_000001           1.0           2.0  2JHH   \n",
       "2  2.0  dsgdb9nsd_000001           1.0           3.0  2JHH   \n",
       "3  3.0  dsgdb9nsd_000001           1.0           4.0  2JHH   \n",
       "4  4.0  dsgdb9nsd_000001           2.0           0.0  1JHC   \n",
       "\n",
       "   scalar_coupling_constant  \n",
       "0                 84.807602  \n",
       "1                -11.257000  \n",
       "2                -11.254800  \n",
       "3                -11.254300  \n",
       "4                 84.807404  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head().to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>molecule_name</th>\n",
       "      <th>atom_index</th>\n",
       "      <th>atom</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>0.0</td>\n",
       "      <td>C</td>\n",
       "      <td>-0.012698</td>\n",
       "      <td>1.085804</td>\n",
       "      <td>0.008001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>1.0</td>\n",
       "      <td>H</td>\n",
       "      <td>0.002150</td>\n",
       "      <td>-0.006031</td>\n",
       "      <td>0.001976</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>2.0</td>\n",
       "      <td>H</td>\n",
       "      <td>1.011731</td>\n",
       "      <td>1.463751</td>\n",
       "      <td>0.000277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>3.0</td>\n",
       "      <td>H</td>\n",
       "      <td>-0.540815</td>\n",
       "      <td>1.447527</td>\n",
       "      <td>-0.876644</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>4.0</td>\n",
       "      <td>H</td>\n",
       "      <td>-0.523814</td>\n",
       "      <td>1.437933</td>\n",
       "      <td>0.906397</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      molecule_name  atom_index atom         x         y         z\n",
       "0  dsgdb9nsd_000001         0.0    C -0.012698  1.085804  0.008001\n",
       "1  dsgdb9nsd_000001         1.0    H  0.002150 -0.006031  0.001976\n",
       "2  dsgdb9nsd_000001         2.0    H  1.011731  1.463751  0.000277\n",
       "3  dsgdb9nsd_000001         3.0    H -0.540815  1.447527 -0.876644\n",
       "4  dsgdb9nsd_000001         4.0    H -0.523814  1.437933  0.906397"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "struct.head().to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    C\n",
       "1    F\n",
       "2    H\n",
       "3    N\n",
       "4    O\n",
       "Name: atom, dtype: object"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "struct.atom.unique().to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "molecule_info = struct[struct.molecule_name=='dsgdb9nsd_000001']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "mol_pandas = molecule_info.to_pandas()\n",
    "atoms =  mol_pandas.atom.values.tolist()\n",
    "xyz = mol_pandas[['x','y','z']].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.2698136e-02,  1.0858041e+00,  8.0009960e-03],\n",
       "       [ 2.1504159e-03, -6.0313176e-03,  1.9761203e-03],\n",
       "       [ 1.0117308e+00,  1.4637512e+00,  2.7657481e-04],\n",
       "       [-5.4081506e-01,  1.4475266e+00, -8.7664372e-01],\n",
       "       [-5.2381361e-01,  1.4379326e+00,  9.0639728e-01]], dtype=float32)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xyz"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build molecule dataframe :\n",
    "- molecule_name, num_nodes, node_dim, atom_index, symbol, acceptor, donor, aromatic, hybridization, num_h, atomic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "structure_pandas = struct.to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from atom_features import * \n",
    "import os\n",
    "import pandas as pd \n",
    "def get_nodes_info(mol_id): \n",
    "\n",
    "    molecule_structure = gb.get_group(mol_id)\n",
    "    molecule_structure = molecule_structure.sort_values(['atom_index'], ascending=True)\n",
    "    atoms =  molecule_structure.atom.values.tolist()\n",
    "    xyz = molecule_structure[['x','y','z']].values\n",
    "    molecule_rep = mol_from_axyz(atoms, xyz)\n",
    "    \n",
    "    #--- Get Atoms information from molecule representation\n",
    "    \n",
    "    factory = ChemicalFeatures.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))\n",
    "    feature = factory.GetFeaturesForMol(molecule_rep)\n",
    "    num_nodes = molecule_rep.GetNumAtoms()\n",
    "    #--- Get Atoms' features \n",
    "    feature_list = []\n",
    "    for i in range(num_nodes):\n",
    "        atom = molecule_rep.GetAtomWithIdx(i)\n",
    "        feature_list.append([mol_id, num_nodes, i, atom.GetSymbol(), atom.GetIsAromatic(), atom.GetHybridization(),\n",
    "                             atom.GetTotalNumHs(), atom.GetAtomicNum()])\n",
    "    \n",
    "    # Get Acceptor / Donor flags \n",
    "    donor = np.zeros(num_nodes)\n",
    "    acceptor = np.zeros(num_nodes)\n",
    "    for t in range(0, len(feature)):\n",
    "        if feature[t].GetFamily() == 'Donor':\n",
    "            for i in feature[t].GetAtomIds():\n",
    "                donor[i] = 1\n",
    "        elif feature[t].GetFamily() == 'Acceptor':\n",
    "            for i in feature[t].GetAtomIds():\n",
    "                acceptor[i] = 1  \n",
    "                \n",
    "    arr = np.concatenate([np.array(feature_list).reshape(-1, 8), donor.reshape(-1, 1), acceptor.reshape(-1, 1)], axis=1)\n",
    "    \n",
    "    return pd.DataFrame(arr, columns=['molecule_name', 'num_nodes', 'atom_index', 'symbol',\n",
    "                                      'aromatic', 'hybridization', 'num_h', \n",
    "                                      'atomic', 'acceptor', 'donor']) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 68 ms, sys: 92 ms, total: 160 ms\n",
      "Wall time: 159 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "gb = structure_pandas.groupby('molecule_name')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>molecule_name</th>\n",
       "      <th>num_nodes</th>\n",
       "      <th>atom_index</th>\n",
       "      <th>symbol</th>\n",
       "      <th>aromatic</th>\n",
       "      <th>hybridization</th>\n",
       "      <th>num_h</th>\n",
       "      <th>atomic</th>\n",
       "      <th>acceptor</th>\n",
       "      <th>donor</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>dsgdb9nsd_000002</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>N</td>\n",
       "      <td>False</td>\n",
       "      <td>SP3</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>dsgdb9nsd_000002</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>H</td>\n",
       "      <td>False</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>dsgdb9nsd_000002</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>H</td>\n",
       "      <td>False</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>dsgdb9nsd_000002</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>H</td>\n",
       "      <td>False</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      molecule_name num_nodes atom_index symbol aromatic hybridization num_h  \\\n",
       "0  dsgdb9nsd_000002         4          0      N    False           SP3     0   \n",
       "1  dsgdb9nsd_000002         4          1      H    False             S     0   \n",
       "2  dsgdb9nsd_000002         4          2      H    False             S     0   \n",
       "3  dsgdb9nsd_000002         4          3      H    False             S     0   \n",
       "\n",
       "  atomic acceptor donor  \n",
       "0      7      1.0   0.0  \n",
       "1      1      0.0   0.0  \n",
       "2      1      0.0   0.0  \n",
       "3      1      0.0   0.0  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "molecules = structure_pandas.molecule_name.unique()\n",
    "get_nodes_info(molecules[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 131k/131k [02:07<00:00, 1.03kit/s]  \n",
      "130772it [00:00, 330467.77it/s]\n"
     ]
    }
   ],
   "source": [
    "from parallel_process import parallel_process\n",
    "nodes_info_frames = parallel_process(molecules, get_nodes_info)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "nodes_baseline = pd.concat(nodes_info_frames)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>molecule_name</th>\n",
       "      <th>num_nodes</th>\n",
       "      <th>atom_index</th>\n",
       "      <th>symbol</th>\n",
       "      <th>aromatic</th>\n",
       "      <th>hybridization</th>\n",
       "      <th>num_h</th>\n",
       "      <th>atomic</th>\n",
       "      <th>acceptor</th>\n",
       "      <th>donor</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>C</td>\n",
       "      <td>False</td>\n",
       "      <td>SP3</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>H</td>\n",
       "      <td>False</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>H</td>\n",
       "      <td>False</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>dsgdb9nsd_000001</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>H</td>\n",
       "      <td>False</td>\n",
       "      <td>S</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      molecule_name num_nodes atom_index symbol aromatic hybridization num_h  \\\n",
       "0  dsgdb9nsd_000001         5          0      C    False           SP3     0   \n",
       "1  dsgdb9nsd_000001         5          1      H    False             S     0   \n",
       "2  dsgdb9nsd_000001         5          2      H    False             S     0   \n",
       "3  dsgdb9nsd_000001         5          3      H    False             S     0   \n",
       "\n",
       "  atomic acceptor donor  \n",
       "0      6      0.0   0.0  \n",
       "1      1      0.0   0.0  \n",
       "2      1      0.0   0.0  \n",
       "3      1      0.0   0.0  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nodes_baseline.head(4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "node_frame.to_csv('/rapids/notebooks/srabhi/champs-2019/input/parquet/baseline_node_frame_2.csv', index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
